#  -*- coding = utf-8 -*-
# @Time :  15:52
# @Author: blackshark
#@File :educationCrawler.py
#@SoftWare : PyCharm


import requests
from bs4 import BeautifulSoup
import re

url="http://foundation.scu.edu.cn"
pageUrl=["http://foundation.scu.edu.cn/xwzx/xwdt.htm","http://foundation.scu.edu.cn/xwzx/40.htm","http://foundation.scu.edu.cn/xwzx/39.htm"]
k=0
subUrl=[]
title=[]
information=[]
passage=[]
imgUrl=[]
pagePicNum=[]
picNum=0

for x in range(len(pageUrl)):

    response = requests.get(pageUrl[x])
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, "html.parser")
    get = soup.find_all('div', class_='tmbtr fr')
    for i in get :
        subUrl.append(url + i.find('a')['href'][2 :])
        k = k + 1

for y in range(len(subUrl)):
    response = requests.get(subUrl[y])
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, "html.parser")
    get = soup.find('div', class_='main')
    title.append(get.find('h2').get_text())
    information.append(get.find('span').get_text())

    getText=soup.find('div',class_='content')
    content=getText.find('div',class_='v_news_content')
    passage.append(content.get_text())

    imgArry=content.find_all('p',style="text-align: center;")
    for i in range(len(imgArry)):
        if(len(url+''.join(re.findall(r"orisrc=\"(.+?)\"",str(imgArry[i]))))>30):
            imgUrl.append(url+''.join(re.findall(r"orisrc=\"(.+?)\"",str(imgArry[i]))))
            picNum=picNum+1

    imgArry = content.find_all('p', style="text-align: center")
    for i in range(len(imgArry)) :
        if (len(url + ''.join(re.findall(r"orisrc=\"(.+?)\"", str(imgArry[i])))) > 30) :
            imgUrl.append(url + ''.join(re.findall(r"orisrc=\"(.+?)\"", str(imgArry[i]))))
            picNum = picNum + 1

    pagePicNum.append(picNum)
    picNum=0

















# for i in range(len(subUrl)):
#     print(subUrl[i])
# for i in range(len(title)):
#      print(title[i])
# for i in range(len(information)):
#     print(information[i])
# for i in range(len(passage)):
#     print(passage[0])
# print(len(passage))
# for i in range(len(imgUrl)):
#      print(imgUrl[i])
# for i in range(len(pagePicNum)):
#     print(pagePicNum[i])
# print(len(imgUrl))

